clear all
set more off
cap log close
cd "${master_dir}"
log using "${log_dir}/1c-IdentifyLikelyVoters.log", replace
***************************************************************************************************
* 
* Program: 1c-IdentifyLikelyVoters.do
* Purpose: Construct various "likely voter" measures (for election & placebo dates)
* Sections:
*     1. Construct likely voter measures [as well as placebo for all days in sample]
*     2. Repeat for each radius option (10m to 100m) on Election Day
* Files Used:
*     1. Pings_all_days.dta
*     2. PollingPlaces2016_w_TimeZones_and_Buildings.dta
* Files Created:
*     1. 1c-IdentifyLikelyVoters.log
*     2. likelyvoters.dta
*     3. likelyvoters_rad10.dta - likelyvoters_rad100.dta
*
***************************************************************************************************

***************************************************************************************************
*  1. Construct likely voter measures [as well as placebo for all days in sample]
***************************************************************************************************

cd "${master_dir}"
use ID_11_16 PollingPlace_ID day local_date_sec Sec_Since_Last Sec_Till_Next_Ping ///
  Dist_to_PollingPlace_M if Dist_to_PollingPlace_M <= 60 ///
  using "${data_dir}/Pings_all_days.dta", clear

* Construct upperbound wait time measures for every Individual / Polling Place / Day combination
gegen double earliestping = min(local_date_sec), by(ID_11 PollingPlace_ID day)
gegen double latestping = max(local_date_sec), by(ID_11 PollingPlace_ID day)
gen double lowerbound = (latestping - earliestping)/1000

gen double sec_before_earliest_ping1 = Sec_Since_Last if earliestping == local_date_sec
gegen double sec_before_earliest_ping = max(sec_before_earliest_ping1), by(ID_11 PollingPlace_ID day)
drop sec_before_earliest_ping1

gen double sec_after_latest_ping1 = Sec_Till_Next_Ping if latestping == local_date_sec
gegen double sec_after_latest_ping = max(sec_after_latest_ping1), by(ID_11 PollingPlace_ID day)
drop sec_after_latest_ping1

gen double upperbound = lowerbound + sec_before_earliest_ping + sec_after_latest_ping
  
* Now, we drop any Individual / Polling Place / Day combinations that are less than 1 minute long. 
drop if upperbound <= 60 | upperbound == .

* Count the number of poll & day combos a person has across all days
gegen tag_IDdaypoll = tag(ID day PollingPlace_ID)
gegen totalpolldaycount = sum(tag_IDdaypoll), by(ID)

* To keep the data managable in size, collapse by ID / Day / Polling Place
keep if tag_IDdaypoll == 1

* Create 2 versions of "Likely Voter" Filter. Stricter version (v2) is a subset of Weak version: (v1)
* Strict version: Can't have any spells longer than 60 seconds in any polling places except 
* on Election Day (and can only have it at one polling place on Election Day).
foreach X in 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 {
    gen likelyvoter_v2_d`X' = (totalpolldaycount == 1 & day == td(`X'nov2016))
}

* Weak version: Can't show up at more than 1 polling place on election day. Also can't show up at
* that polling place on any other day than election day (but can show up at other places on other
* days).
* Three steps to this: 
* (1) How many polling places do you show up at on Election Day? If more than 1, likelyvoter_v1 = 0
* (2) Which polling place is your "Election Day" polling place?
* (3) Do you show up at that election day polling place on any other day? If yes, likelyvoter_v1 = 0
* Do this for all other days too (so can do this on Placebo analysis)

gegen totalpolls_per_day = sum(tag_IDdaypoll), by(ID day)
gegen totaldays_per_poll = sum(tag_IDdaypoll), by(ID PollingPlace_ID)

foreach X in 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 {
    * Count the number of polls visited on Election (or corresponding Placebo) Day.
    gen totalpolls_per_day`X't = totalpolls_per_day if day == td(`X'nov2016)
    * Assign this count to every observation for the person, so we can use it as a condition.
	gegen totalpolls_per_day`X' = max(totalpolls_per_day`X't), by(ID)
    drop totalpolls_per_day`X't
    * For an individaul who visits only 1 on Election Day, how many days total do they visit it?
    gen totaldays_per_poll_day`X't = totaldays_per_poll if (totalpolls_per_day == 1 & day == td(`X'nov2016))
    * Assign that total to every observation for the person, so we can use it as a condition.
	gegen totaldays_per_poll_day`X' = max(totaldays_per_poll_day`X't), by(ID)
    drop totaldays_per_poll_day`X't
    * Now, apply the two conditions to make the likely voter filter
    gen likelyvoter_v1_d`X' = (totalpolls_per_day`X' == 1 & totaldays_per_poll_day`X' == 1)
}
browse ID PollingPlace totalpolls_per_day totaldays_per_poll likelyvoter_v1_d* day

keep ID_11_16 likelyvoter*
egen tag_ID = tag(ID)
keep if tag_ID == 1
drop tag_ID
sum likelyvoter*

sort ID_11_16
save "$data_dir/likelyvoters.dta", replace

***************************************************************************************************
*  2. Repeat for each radius option (10m to 100m) on Election Day
***************************************************************************************************
foreach X in 10 20 30 40 50 60 70 80 90 100 {
    use ID_11_16 PollingPlace_ID day local_date_sec Sec_Since_Last Sec_Till_Next_Ping ///
      Dist_to_PollingPlace_M if Dist_to_PollingPlace_M <= `X' ///
      using "${data_dir}/Pings_all_days.dta", clear
    
    * Construct upperbound wait time measures for every Individual / Polling Place / Day combination
    gegen double earliestping = min(local_date_sec), by(ID_11 PollingPlace_ID day)
    gegen double latestping = max(local_date_sec), by(ID_11 PollingPlace_ID day)
    gen double lowerbound = (latestping - earliestping)/1000
    
    gen double sec_before_earliest_ping1 = Sec_Since_Last if earliestping == local_date_sec
	gegen double sec_before_earliest_ping = max(sec_before_earliest_ping1), by(ID_11 ///
	  PollingPlace_ID day)

    drop sec_before_earliest_ping1
    
    gen double sec_after_latest_ping1 = Sec_Till_Next_Ping if latestping == local_date_sec
    gegen double sec_after_latest_ping = max(sec_after_latest_ping1), by(ID_11 PollingPlace_ID day)

    drop sec_after_latest_ping1
    
    gen double upperbound = lowerbound + sec_before_earliest_ping + sec_after_latest_ping
    drop if upperbound <= 60 | upperbound == .
    
    * Count the number of poll & day combos a person has across all days
    gegen tag_IDdaypoll = tag(ID day PollingPlace_ID)
    gegen totalpolldaycount = sum(tag_IDdaypoll), by(ID)
    
    * To keep the data managable in size, collapse by ID / Day / Polling Place
    keep if tag_IDdaypoll == 1
    gen likelyvoter_v2_d08 = (totalpolldaycount == 1 & day == td(08nov2016))

    gegen totalpolls_per_day = sum(tag_IDdaypoll), by(ID day)
    gegen totaldays_per_poll = sum(tag_IDdaypoll), by(ID PollingPlace_ID)

    gen totalpolls_per_day08t = totalpolls_per_day if day == td(08nov2016)
    gegen totalpolls_per_day08 = max(totalpolls_per_day08t), by(ID)
    drop totalpolls_per_day08t
    gen totaldays_per_poll_day08t = totaldays_per_poll if (totalpolls_per_day == 1 & ///
	  day == td(08nov2016))
    gegen totaldays_per_poll_day08 = max(totaldays_per_poll_day08t), by(ID)
    drop totaldays_per_poll_day08t
    gen likelyvoter_v1_d08 = (totalpolls_per_day08 == 1 & totaldays_per_poll_day08 == 1)
    browse ID PollingPlace totalpolls_per_day totaldays_per_poll likelyvoter_v1_d* day
    
    keep ID_11_16 likelyvoter*
    gegen tag_ID = tag(ID)
    keep if tag_ID == 1
    drop tag_ID
    sum likelyvoter*
    sort ID_11_16
    save "$data_dir/likelyvoters_rad`X'.dta", replace
}

log close

